## Rueda 2008 WP

library(foreign)
library(Amelia)

## Load original dataset
r2008 <- read.dta("R2008 WP Rep Data.dta")
head(r2008)
dim(r2008)

# Drop ID vars
r2008$country <- NULL

## Drop derived dummy vars
r2008 <- r2008[, -c(26:40)]

## How many variables? 25: no reduction necessary
dim(r2008)

## Stage 3: Imputation
## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(r2008)
mean(NAs(r2008)/nrow(r2008))*100

## Thus: 5 imputations

set.seed(02138)
r2008.out <- amelia(r2008, m = 5, ts = "year", cs = "count", polytime = 3, lags = c("govpxcor", "govem", "generos", "min", "govemxcor", "genxcor", "minxcor", "ia5010"), empri = 0.01*nrow(r2008))

write.amelia(obj=r2008.out, file.stem = "R2008 WP Imp Data", format = "dta", separate = FALSE)